-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[SLP]Initial support for copyable elements #147366
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[SLP]Initial support for copyable elements #147366
Conversation
Created using spr 1.3.5
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesAdds initial support for copyable elements, both schedulable and Patch is 117.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/147366.diff 11 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c93af749507f8..c5a6de40fa72b 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -206,6 +206,11 @@ static cl::opt<bool> VectorizeNonPowerOf2(
"slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden,
cl::desc("Try to vectorize with non-power-of-2 number of elements."));
+static cl::opt<bool> VectorizeCopyableElements(
+ "slp-copyable-elements", cl::init(true), cl::Hidden,
+ cl::desc("Try to replace values with the idempotent instructions for "
+ "better vectorization."));
+
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
@@ -519,17 +524,17 @@ static bool isSplat(ArrayRef<Value *> VL) {
/// instructions, we need to use the converted opcode along with the original
/// uses.
/// \param I The instruction to check for commutativity
-/// \param InstWithUses The instruction whose uses are analyzed for special
+/// \param ValWithUses The value whose uses are analyzed for special
/// patterns
-static bool isCommutative(Instruction *I, Instruction *InstWithUses) {
+static bool isCommutative(Instruction *I, Value *ValWithUses) {
if (auto *Cmp = dyn_cast<CmpInst>(I))
return Cmp->isCommutative();
if (auto *BO = dyn_cast<BinaryOperator>(I))
return BO->isCommutative() ||
(BO->getOpcode() == Instruction::Sub &&
- !InstWithUses->hasNUsesOrMore(UsesLimit) &&
+ !ValWithUses->hasNUsesOrMore(UsesLimit) &&
all_of(
- InstWithUses->uses(),
+ ValWithUses->uses(),
[](const Use &U) {
// Commutative, if icmp eq/ne sub, 0
CmpPredicate Pred;
@@ -546,8 +551,8 @@ static bool isCommutative(Instruction *I, Instruction *InstWithUses) {
Flag->isOne());
})) ||
(BO->getOpcode() == Instruction::FSub &&
- !InstWithUses->hasNUsesOrMore(UsesLimit) &&
- all_of(InstWithUses->uses(), [](const Use &U) {
+ !ValWithUses->hasNUsesOrMore(UsesLimit) &&
+ all_of(ValWithUses->uses(), [](const Use &U) {
return match(U.getUser(),
m_Intrinsic<Intrinsic::fabs>(m_Specific(U.get())));
}));
@@ -564,6 +569,19 @@ static bool isCommutative(Instruction *I, Instruction *InstWithUses) {
/// \returns true if the instruction is commutative, false otherwise
static bool isCommutative(Instruction *I) { return isCommutative(I, I); }
+/// \returns number of operands of \p I, considering commutativity. Returns 2
+/// for commutative instrinsics.
+/// \param I The instruction to check for commutativity
+static unsigned getNumberOfPotentiallyCommutativeOps(Instruction *I) {
+ if (isa<IntrinsicInst>(I) && isCommutative(I)) {
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
+ // arguments to the intrinsic produces the same result.
+ constexpr unsigned IntrinsicNumOperands = 2;
+ return IntrinsicNumOperands;
+ }
+ return I->getNumOperands();
+}
+
template <typename T>
static std::optional<unsigned> getInsertExtractIndex(const Value *Inst,
unsigned Offset) {
@@ -855,6 +873,23 @@ static std::optional<unsigned> getExtractIndex(const Instruction *E) {
return *EI->idx_begin();
}
+namespace llvm {
+/// Checks if the provided value does not require scheduling. It does not
+/// require scheduling if this is not an instruction or it is an instruction
+/// that does not read/write memory and all operands are either not instructions
+/// or phi nodes or instructions from different blocks.
+static bool areAllOperandsNonInsts(Value *V);
+/// Checks if the provided value does not require scheduling. It does not
+/// require scheduling if this is not an instruction or it is an instruction
+/// that does not read/write memory and all users are phi nodes or instructions
+/// from the different blocks.
+static bool isUsedOutsideBlock(Value *V);
+/// Checks if the specified value does not require scheduling. It does not
+/// require scheduling if all operands and all users do not need to be scheduled
+/// in the current basic block.
+static bool doesNotNeedToBeScheduled(Value *V);
+} // namespace llvm
+
namespace {
/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
@@ -957,6 +992,31 @@ class BinOpSameOpcodeHelper {
return Instruction::Xor;
llvm_unreachable("Cannot find interchangeable instruction.");
}
+ /// Return true if the \p Opcode is a candidate for interchange.
+ bool hasCandidateOpcode(unsigned Opcode) const {
+ MaskType Candidate = Mask & SeenBefore;
+ switch (Opcode) {
+ case Instruction::Shl:
+ return Candidate & ShlBIT;
+ case Instruction::AShr:
+ return Candidate & AShrBIT;
+ case Instruction::Mul:
+ return Candidate & MulBIT;
+ case Instruction::Add:
+ return Candidate & AddBIT;
+ case Instruction::Sub:
+ return Candidate & SubBIT;
+ case Instruction::And:
+ return Candidate & AndBIT;
+ case Instruction::Or:
+ return Candidate & OrBIT;
+ case Instruction::Xor:
+ return Candidate & XorBIT;
+ default:
+ break;
+ }
+ llvm_unreachable("Cannot find interchangeable instruction.");
+ }
SmallVector<Value *> getOperand(const Instruction *To) const {
unsigned ToOpcode = To->getOpcode();
unsigned FromOpcode = I->getOpcode();
@@ -1117,6 +1177,10 @@ class BinOpSameOpcodeHelper {
AltOp.trySet(OpcodeInMaskForm, InterchangeableMask));
}
unsigned getMainOpcode() const { return MainOp.getOpcode(); }
+ /// Return true if the \p Opcode is a candidate for interchange.
+ bool hasCandidateOpcode(unsigned Opcode) const {
+ return MainOp.hasCandidateOpcode(Opcode);
+ }
bool hasAltOp() const { return AltOp.I; }
unsigned getAltOpcode() const {
return hasAltOp() ? AltOp.getOpcode() : getMainOpcode();
@@ -1152,6 +1216,8 @@ class InstructionsState {
/// GetVectorCost.
Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;
+ /// Weather the instruction state represents copyable instructions.
+ bool HasCopyables = false;
public:
Instruction *getMainOp() const {
@@ -1190,9 +1256,11 @@ class InstructionsState {
if (!I->isBinaryOp())
return nullptr;
BinOpSameOpcodeHelper Converter(MainOp);
- if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
- return MainOp;
- return AltOp;
+ if (!Converter.add(I) || !Converter.add(MainOp))
+ return nullptr;
+ if (Converter.hasAltOp() && !isAltShuffle())
+ return nullptr;
+ return Converter.hasAltOp() ? AltOp : MainOp;
}
/// Checks if main/alt instructions are shift operations.
@@ -1237,9 +1305,67 @@ class InstructionsState {
explicit operator bool() const { return valid(); }
InstructionsState() = delete;
- InstructionsState(Instruction *MainOp, Instruction *AltOp)
- : MainOp(MainOp), AltOp(AltOp) {}
+ InstructionsState(Instruction *MainOp, Instruction *AltOp,
+ bool HasCopyables = false)
+ : MainOp(MainOp), AltOp(AltOp), HasCopyables(HasCopyables) {}
static InstructionsState invalid() { return {nullptr, nullptr}; }
+
+ /// Checks if the value is a copyable element.
+ bool isCopyableElement(Value *V) const {
+ assert(valid() && "InstructionsState is invalid.");
+ if (!HasCopyables)
+ return false;
+ if (isAltShuffle() || getOpcode() == Instruction::GetElementPtr)
+ return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return !isa<PoisonValue>(V);
+ if (I->getParent() != MainOp->getParent() &&
+ (!isVectorLikeInstWithConstOps(I) ||
+ !isVectorLikeInstWithConstOps(MainOp)))
+ return true;
+ if (I->getOpcode() == MainOp->getOpcode())
+ return false;
+ if (!I->isBinaryOp())
+ return true;
+ BinOpSameOpcodeHelper Converter(MainOp);
+ return !Converter.add(I) || !Converter.add(MainOp) ||
+ Converter.hasAltOp() || !Converter.hasCandidateOpcode(getOpcode());
+ }
+
+ /// Checks if the value is non-schedulable.
+ bool isNonSchedulable(Value *V) const {
+ assert(valid() && "InstructionsState is invalid.");
+ auto *I = dyn_cast<Instruction>(V);
+ if (!HasCopyables)
+ return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) ||
+ doesNotNeedToBeScheduled(V);
+ // MainOp for copyables always schedulable to correctly identify
+ // non-schedulable copyables.
+ if (getMainOp() == V)
+ return false;
+ if (isCopyableElement(V)) {
+ auto IsNonSchedulableCopyableElement = [this](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return !I || isa<PHINode>(I) || I->getParent() != MainOp->getParent() ||
+ (doesNotNeedToBeScheduled(I) &&
+ // If the copyable instructions comes after MainOp
+ // (non-schedulable, but used in the block) - cannot vectorize
+ // it, will possibly generate use before def.
+ (isVectorLikeInstWithConstOps(I) || !MainOp->comesBefore(I)));
+ };
+
+ return IsNonSchedulableCopyableElement(V);
+ }
+ return !I || isa<PHINode>(I) || isVectorLikeInstWithConstOps(I) ||
+ doesNotNeedToBeScheduled(V);
+ }
+
+ /// Checks if the state represents copyable instructions.
+ bool areInstructionsWithCopyableElements() const {
+ assert(valid() && "InstructionsState is invalid.");
+ return HasCopyables;
+ }
};
std::pair<Instruction *, SmallVector<Value *>>
@@ -1767,6 +1893,7 @@ class BoUpSLP {
class TreeEntry;
class ScheduleEntity;
class ScheduleData;
+ class ScheduleCopyableData;
class ScheduleBundle;
class ShuffleCostEstimator;
class ShuffleInstructionBuilder;
@@ -2126,6 +2253,7 @@ class BoUpSLP {
operator bool() const { return UserTE != nullptr; }
};
+ friend struct DenseMapInfo<EdgeInfo>;
/// A helper class used for scoring candidates for two consecutive lanes.
class LookAheadHeuristics {
@@ -2890,18 +3018,14 @@ class BoUpSLP {
assert(S.valid() && "InstructionsState is invalid.");
// IntrinsicInst::isCommutative returns true if swapping the first "two"
// arguments to the intrinsic produces the same result.
- constexpr unsigned IntrinsicNumOperands = 2;
Instruction *MainOp = S.getMainOp();
unsigned NumOperands = MainOp->getNumOperands();
- ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
+ ArgSize = ::getNumberOfPotentiallyCommutativeOps(MainOp);
OpsVec.resize(ArgSize);
unsigned NumLanes = VL.size();
for (OperandDataVec &Ops : OpsVec)
Ops.resize(NumLanes);
for (unsigned Lane : seq<unsigned>(NumLanes)) {
- Value *V = VL[Lane];
- assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
- "Expected instruction or poison value");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of V and whether the operand at OpIdx is the LHS or RHS
@@ -2912,17 +3036,24 @@ class BoUpSLP {
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely tell
// the inverse operations by checking commutativity.
- if (isa<PoisonValue>(V)) {
+ auto *I = dyn_cast<Instruction>(VL[Lane]);
+ if (!I && isa<PoisonValue>(VL[Lane])) {
for (unsigned OpIdx : seq<unsigned>(NumOperands))
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false};
continue;
}
- auto [SelectedOp, Ops] = convertTo(cast<Instruction>(V), S);
- // We cannot check commutativity by the converted instruction
- // (SelectedOp) because isCommutative also examines def-use
- // relationships.
- bool IsInverseOperation =
- !isCommutative(SelectedOp, cast<Instruction>(V));
+ bool IsInverseOperation = false;
+ if (S.isCopyableElement(VL[Lane])) {
+ // The value is a copyable element.
+ IsInverseOperation = !isCommutative(MainOp, VL[Lane]);
+ } else {
+ assert(I && "Expected instruction");
+ auto [SelectedOp, Ops] = convertTo(I, S);
+ // We cannot check commutativity by the converted instruction
+ // (SelectedOp) because isCommutative also examines def-use
+ // relationships.
+ IsInverseOperation = !isCommutative(SelectedOp, I);
+ }
for (unsigned OpIdx : seq<unsigned>(ArgSize)) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], APO, false};
@@ -3792,6 +3923,9 @@ class BoUpSLP {
/// reordering of operands during buildTreeRec() and vectorizeTree().
SmallVector<ValueList, 2> Operands;
+ /// Copyable elements of the entry node.
+ SmallPtrSet<const Value *, 4> CopyableElements;
+
/// MainOp and AltOp are recorded inside. S should be obtained from
/// newTreeEntry.
InstructionsState S = InstructionsState::invalid();
@@ -3820,11 +3954,7 @@ class BoUpSLP {
void setInterleave(unsigned Factor) { InterleaveFactor = Factor; }
/// Marks the node as one that does not require scheduling.
- void setDoesNotNeedToSchedule() {
- assert(::doesNotNeedToSchedule(Scalars) &&
- "Expected to not need scheduling");
- DoesNotNeedToSchedule = true;
- }
+ void setDoesNotNeedToSchedule() { DoesNotNeedToSchedule = true; }
/// Returns true if the node is marked as one that does not require
/// scheduling.
bool doesNotNeedToSchedule() const { return DoesNotNeedToSchedule; }
@@ -3896,6 +4026,20 @@ class BoUpSLP {
bool hasState() const { return S.valid(); }
+ /// Add \p V to the list of copyable elements.
+ void addCopyableElement(Value *V) {
+ assert(S.isCopyableElement(V) && "Not a copyable element.");
+ CopyableElements.insert(V);
+ }
+
+ /// Returns true if \p V is a copyable element.
+ bool isCopyableElement(Value *V) const {
+ return CopyableElements.contains(V);
+ }
+
+ /// Returns true if any scalar in the list is a copyable element.
+ bool hasCopyableElements() const { return !CopyableElements.empty(); }
+
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
int findLaneForValue(Value *V) const {
@@ -3968,6 +4112,8 @@ class BoUpSLP {
for (Value *V : Scalars)
dbgs().indent(2) << *V << "\n";
dbgs() << "State: ";
+ if (S && hasCopyableElements())
+ dbgs() << "[[Copyable]] ";
switch (State) {
case Vectorize:
if (InterleaveFactor > 0) {
@@ -4145,12 +4291,20 @@ class BoUpSLP {
}
}
} else if (!Last->isGather()) {
- if (doesNotNeedToSchedule(VL))
+ if (isa<PHINode>(S.getMainOp()) ||
+ isVectorLikeInstWithConstOps(S.getMainOp()) ||
+ (!S.areInstructionsWithCopyableElements() &&
+ doesNotNeedToSchedule(VL)) ||
+ all_of(VL, [&](Value *V) { return S.isNonSchedulable(V); }))
Last->setDoesNotNeedToSchedule();
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
+ if (S.isCopyableElement(V)) {
+ Last->addCopyableElement(V);
+ continue;
+ }
auto It = ScalarToTreeEntries.find(V);
if (It == ScalarToTreeEntries.end()) {
ScalarToTreeEntries.try_emplace(V).first->getSecond().push_back(Last);
@@ -4162,16 +4316,14 @@ class BoUpSLP {
}
}
// Update the scheduler bundle to point to this TreeEntry.
- assert((!Bundle.getBundle().empty() || isa<PHINode>(S.getMainOp()) ||
- isVectorLikeInstWithConstOps(S.getMainOp()) ||
- Last->doesNotNeedToSchedule()) &&
+ assert((!Bundle.getBundle().empty() || Last->doesNotNeedToSchedule()) &&
"Bundle and VL out of sync");
if (!Bundle.getBundle().empty()) {
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
auto *BundleMember = Bundle.getBundle().begin();
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V) || !Processed.insert(V).second)
+ if (S.isNonSchedulable(V) || !Processed.insert(V).second)
continue;
++BundleMember;
}
@@ -4280,7 +4432,8 @@ class BoUpSLP {
/// in general.
ScalarsVectorizationLegality
getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
- const EdgeInfo &UserTreeIdx) const;
+ const EdgeInfo &UserTreeIdx,
+ bool TryCopyableElementsVectorization) const;
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
@@ -4433,16 +4586,18 @@ class BoUpSLP {
/// List of hashes of vector of loads, which are known to be non vectorizable.
DenseSet<size_t> ListOfKnonwnNonVectorizableLoads;
- /// Represents a scheduling entity, either ScheduleData or ScheduleBundle.
- /// ScheduleData used to gather dependecies for a single instructions, while
- /// ScheduleBundle represents a batch of instructions, going to be groupped
- /// together.
+ /// Represents a scheduling entity, either ScheduleData, ScheduleCopyableData
+ /// or ScheduleBundle. ScheduleData used to gather dependecies for a single
+ /// instructions, while ScheduleBundle represents a batch of instructions,
+ /// going to be groupped together. ScheduleCopyableData models extra user for
+ /// "copyable" instructions.
class ScheduleEntity {
friend class ScheduleBundle;
friend class ScheduleData;
+ friend class ScheduleCopyableData;
protected:
- enum class Kind { ScheduleData, ScheduleBundle };
+ enum class Kind { ScheduleData, ScheduleBundle, ScheduleCopyableData };
Kind getKind() const { return K; }
ScheduleEntity(Kind K) : K(K) {}
@@ -4461,17 +4616,79 @@ class BoUpSLP {
void setSchedulingPriority(int Priority) { SchedulingPriority = Priority; }
int getSchedulingPriority() const { return SchedulingPriority; }
bool isReady() const {
- if (auto *SD = dyn_cast<ScheduleData>(this))
+ if (const auto *SD = dyn_cast<ScheduleData>(this))
return SD->isReady();
+ if (const auto *CD = dyn_cast<ScheduleCopyableData>(this))
+ return CD->isReady();
return cast<ScheduleBundle>(this)->isReady();
}
+ /// Returns true if the dependency information has been calculated.
+ /// Note that depenendency validity can vary between instructions within
+ /// a single bundle.
+ bool hasValidDependencies() const {
+ if (const auto *SD = dyn_cast<ScheduleData>(this))
+ return SD->hasValidDependencies();
+ if (const auto *CD = dyn_cast<ScheduleCopyableData>(this))
+ return CD->hasValidDependencies();
+ return cast<ScheduleBundle>(this)->hasValidDependencies();
+ }
+ /// Gets the number of unscheduled dependencies.
+ int getUnscheduledDeps() const {
+ if (const auto *SD = dyn_cast<ScheduleData>(this))
+ return SD->getUnscheduledDeps();
+ if (const auto *CD = dyn_cast<ScheduleCopyableData>(this))
+ return CD->getUnscheduledDeps();
+ return cast<ScheduleBundle>(this)->unscheduledDepsInBundle();
+ }
+ /// Increments the number of unscheduled dependencies.
+ int incrementUnscheduledDeps(int Incr) {
+ if (auto *SD = dyn_cast<ScheduleData>(this))
+ return SD->incrementUnscheduledDeps(Incr);
+ return cast<ScheduleCopyableData>(this)->incrementUnscheduledDeps(Incr);
+ }
+ /// Gets...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll llvm/test/Transforms/SLPVectorizer/revec.ll The following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
} Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
} Please refer to the Undefined Behavior Manual for more information. |
Created using spr 1.3.5
Does this replace the previous patch? |
No, adds support for schedulable elements. It is based on the patch for non-schedulable copyables. |
Adds initial support for copyable elements, both schedulable and
non-schedulable.
Adds support only for add for now, other opcodes will added in future.
Still some cases are not handled, e.g. stores do not include this,
because currently do not check for copyable elements.